package profiterole.mapreduce;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Callable;

/** loader to workable basic unit. The units are processed concurrently 
 * 
 * @author Boris Farber
 *
 * @param <InputUnit>
 */
public class MapCallback<InputUnit> {

	public Callable<OutputUnit> makeWorker(final InputUnit inputUnit) {
		return new Callable<OutputUnit>() {	

			@Override
			public OutputUnit call() throws Exception {
				return  new OutputUnit(new File(inputUnit.toString()));
			}
		};
	}

	public static class OutputUnit {

		private File key;
		private List<String> wordList = new ArrayList<String>();
		private HashMap<String, Integer> map;

		public  OutputUnit(File key) {
			//TODO what to todo with the line below
			System.out.println("Processing " + key);
			this.key = key;

			fileToWords();
			map = new Combiner().reduce(wordList);
		}

		public void fileToWords() {
			BufferedReader br = null;
			try {
				br = new BufferedReader(new FileReader(key));
				String line;

				while((line = br.readLine()) != null) {

					// TODO do more separations, the regex line below has some problems
					// add smart methods to have the code one time
					
					// TODO move to method and check few times
					String[] tokens = line.trim().split("((?<=\\s\\w{1,10})[^\\w\\s])?\\.?\\s|[^\\w\\s]$"); 

					for (String t : tokens) {						
						
						// TODO not sure about the loop here, but may be good idea b/c 
						// this is concurrent with zero side effects
						for (int i = 0; i < 3; i++) {
							if(t.endsWith(".") || t.endsWith("?") || t.endsWith("!") || t.endsWith(",") || t.endsWith("\"") ||
									t.endsWith("'") || t.endsWith("`") || t.endsWith("*")
									|| t.endsWith("(") || t.endsWith(")")) {
								t = t.substring(0, t.length() -1);
							}
							if(t.startsWith(".") || t.startsWith("?") || t.startsWith("!") || t.startsWith(",") ||
									t.startsWith("\"") || t.startsWith("'") || t.startsWith("`") || t.startsWith("`")
									|| t.startsWith("(") || t.startsWith(")")) {
								t = t.substring(1, t.length());
							}
						}
						
						wordList.add(t);
					}		
				}	
			} catch( IOException e ) {
				e.printStackTrace();
			} finally {
				try {
					br.close();
				} catch( IOException ex ) {
					ex.printStackTrace();
				}
			}
		}

		public HashMap<String, Integer> getMap() {
			return map;
		}
	}

	/** java limitation everything is class, can't make static b/c the object is per
	 *  thread
	 *
	 * referential transparency
	 */
	private static class Combiner {

		public HashMap<String, Integer> reduce(List<String> wordList){

			HashMap<String, Integer> map = new HashMap<String, Integer>();			
			for (String word : wordList) {
				if(map.containsKey(word))
				{
					int value = map.get(word).intValue();
					value++;
					map.put(word, value);
				}
				else
				{
					map.put(word, new Integer(1));
				}
			}		
			return map;
		}
	}
}